{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 高斯混合聚类 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "CSDN csdn_inside: [GMM 高斯混合模型 聚类 Python实现](https://blog.csdn.net/csdn_inside/article/details/85267341)\n",
    "\n",
    "[西瓜书数据集4.0](https://blog.csdn.net/xxliu_csdn/article/details/86483816)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0901.png\", width=900>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:36.584810Z",
     "start_time": "2020-01-29T13:27:36.178861Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.697</td>\n",
       "      <td>0.460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.774</td>\n",
       "      <td>0.376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.634</td>\n",
       "      <td>0.264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.608</td>\n",
       "      <td>0.318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.556</td>\n",
       "      <td>0.215</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a      b\n",
       "0  0.697  0.460\n",
       "1  0.774  0.376\n",
       "2  0.634  0.264\n",
       "3  0.608  0.318\n",
       "4  0.556  0.215"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "data=pd.read_csv('data_watermelon/watermelon_4.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:36.595748Z",
     "start_time": "2020-01-29T13:27:36.586772Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 30 entries, 0 to 29\n",
      "Data columns (total 2 columns):\n",
      "a    30 non-null float64\n",
      "b    30 non-null float64\n",
      "dtypes: float64(2)\n",
      "memory usage: 560.0 bytes\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:36.604723Z",
     "start_time": "2020-01-29T13:27:36.598740Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "30"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据信息\n",
    "n_samples=data.shape[0]\n",
    "n_samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:36.674536Z",
     "start_time": "2020-01-29T13:27:36.670547Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_features = data.shape[1]\n",
    "n_features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0902.png\", width=900>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:36.990690Z",
     "start_time": "2020-01-29T13:27:36.987697Z"
    }
   },
   "outputs": [],
   "source": [
    "# 设置超参数\n",
    "n_clusters = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:37.174199Z",
     "start_time": "2020-01-29T13:27:37.168214Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.33333333, 0.33333333, 0.33333333])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 初始化设置\n",
    "import numpy as np\n",
    "alpha = np.ones(n_clusters)/n_clusters\n",
    "alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:37.342957Z",
     "start_time": "2020-01-29T13:27:37.336971Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.403, 0.237],\n",
       "       [0.714, 0.346],\n",
       "       [0.532, 0.472]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mu = np.array([[.403,.237],[.714,.346],[.532,.472]])\n",
    "mu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:37.536891Z",
     "start_time": "2020-01-29T13:27:37.529884Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[0.1, 0. ],\n",
       "        [0. , 0.1]],\n",
       "\n",
       "       [[0.1, 0. ],\n",
       "        [0. , 0.1]],\n",
       "\n",
       "       [[0.1, 0. ],\n",
       "        [0. , 0.1]]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sigma = np.full((n_clusters,n_features,n_features),np.diag(np.full(n_features,0.1)))\n",
    "sigma"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多元高斯分布"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0906.png\", width=700>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "计算数组的行列式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:38.402924Z",
     "start_time": "2020-01-29T13:27:38.398928Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.010000000000000004"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.det(sigma[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "行列式的结果开根号"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:38.762927Z",
     "start_time": "2020-01-29T13:27:38.757937Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10000000000000002"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pow(np.linalg.det(sigma[0]),0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:38.942513Z",
     "start_time": "2020-01-29T13:27:38.936525Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30, 2)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = data.values\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:39.124026Z",
     "start_time": "2020-01-29T13:27:39.118042Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.697, 0.46 ])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:39.379381Z",
     "start_time": "2020-01-29T13:27:39.309528Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.294, 0.223])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(X[0] - mu[0]).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:39.663204Z",
     "start_time": "2020-01-29T13:27:39.502167Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.805641057775526"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算一个多元高斯分布\n",
    "# 其中x mu 1*2 ，sigma 2*2\n",
    "def multiGaussian(x,mu,sigma):\n",
    "    a = 1/((2*np.pi)*pow(np.linalg.det(sigma),0.5))\n",
    "    b = np.exp(-0.5*(x-mu).dot(np.linalg.pinv(sigma)).dot((x-mu).T))\n",
    "    return a*b\n",
    "\n",
    "multiGaussian(X[0], mu[0], sigma[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 高斯混合"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0907.png\", width=700>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:40.420708Z",
     "start_time": "2020-01-29T13:27:40.416703Z"
    }
   },
   "outputs": [],
   "source": [
    "gamma=np.zeros((n_samples,n_clusters))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:40.711482Z",
     "start_time": "2020-01-29T13:27:40.707461Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0.])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p=np.zeros(n_clusters)\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:40.934795Z",
     "start_time": "2020-01-29T13:27:40.927812Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0., 0., 0.])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g=np.zeros(n_clusters)\n",
    "g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:41.110325Z",
     "start_time": "2020-01-29T13:27:41.102345Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.805641057775526"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算x_0的高斯混合\n",
    "## u_0时\n",
    "p[0]=multiGaussian(X[0],mu[0],sigma[0])\n",
    "p[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:41.300816Z",
     "start_time": "2020-01-29T13:27:41.293833Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.26854701925850866"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g[0]=alpha[0]*p[0]\n",
    "g[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:41.686783Z",
     "start_time": "2020-01-29T13:27:41.663845Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算所有x在所有cluster上的高斯混合\n",
    "def computeGamma(X,mu,sigma,alpha,multiGaussian):\n",
    "    n_samples = X.shape[0] # 30个样本\n",
    "    n_clusters = len(alpha) # 3个聚类\n",
    "    gamma=np.zeros((n_samples,n_clusters)) # 存最后的结果，每个x对应着三个gamma\n",
    "    p=np.zeros(n_clusters) # 多元高斯概率 一个x对应三个\n",
    "    g=np.zeros(n_clusters) # a*多元高斯概率 一个x对应三个\n",
    "    for i in range(n_samples): # 每个x\n",
    "        # 对于一个x\n",
    "        for j in range(n_clusters):\n",
    "            # 对于每一个聚类\n",
    "            p[j]=multiGaussian(X[i],mu[j],sigma[j])\n",
    "            g[j]=alpha[j]*p[j]\n",
    "        for k in range(n_clusters):\n",
    "            # 每一个x，每一个聚类，算一个gamma\n",
    "            gamma[i,k]=g[k]/np.sum(g)\n",
    "            # 每个x算出三个\n",
    "    return gamma\n",
    "gamma = computeGamma(X,mu,sigma,alpha,multiGaussian)\n",
    "gamma.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## EM算法迭代更新"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0904.png\", width=700>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:27:57.579905Z",
     "start_time": "2020-01-29T13:27:57.573921Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.36104113, 0.32326298, 0.31569589])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 更新 alpha\n",
    "alpha = np.sum(gamma,axis=0)/n_samples\n",
    "alpha"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 更新mu 考虑一个x，一个聚类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:28:39.621710Z",
     "start_time": "2020-01-29T13:28:39.616691Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30,)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gamma[:,0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:29:16.410072Z",
     "start_time": "2020-01-29T13:29:16.405085Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(30, 1)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gamma[:,0].reshape((n_samples,1)).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:29:34.524454Z",
     "start_time": "2020-01-29T13:29:34.517473Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.15246979, 0.10062569],\n",
       "       [0.16450122, 0.07991274],\n",
       "       [0.19631956, 0.08174821],\n",
       "       [0.18419855, 0.09634069],\n",
       "       [0.2043746 , 0.07902975],\n",
       "       [0.17682102, 0.10398656],\n",
       "       [0.20907716, 0.06476611],\n",
       "       [0.18885348, 0.09118555],\n",
       "       [0.23631904, 0.03228984],\n",
       "       [0.12307463, 0.13523015],\n",
       "       [0.14713564, 0.03423156],\n",
       "       [0.18225724, 0.05260486],\n",
       "       [0.21989474, 0.05540384],\n",
       "       [0.2112001 , 0.06364934],\n",
       "       [0.14455696, 0.14857243],\n",
       "       [0.2458228 , 0.01741072],\n",
       "       [0.2313865 , 0.03314716],\n",
       "       [0.17357954, 0.09089959],\n",
       "       [0.15937127, 0.11329934],\n",
       "       [0.1386724 , 0.12637875],\n",
       "       [0.19859964, 0.06159775],\n",
       "       [0.17550091, 0.08504666],\n",
       "       [0.17680522, 0.11420958],\n",
       "       [0.15209254, 0.13904695],\n",
       "       [0.16963889, 0.1192319 ],\n",
       "       [0.14347891, 0.09342368],\n",
       "       [0.14971191, 0.13282711],\n",
       "       [0.16311597, 0.12966513],\n",
       "       [0.15398079, 0.09451235],\n",
       "       [0.14436769, 0.14857572]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(X*gamma[:,0].reshape((n_samples,1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:30:00.196747Z",
     "start_time": "2020-01-29T13:30:00.190756Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5.31717872, 2.71884969])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(X*gamma[:,0].reshape((n_samples,1)),axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:30:18.718200Z",
     "start_time": "2020-01-29T13:30:18.712211Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.831233991293141"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(gamma,axis=0)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:30:36.579234Z",
     "start_time": "2020-01-29T13:30:36.568297Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.49091163, 0.25101938])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 更新 mu_0\n",
    "mu[0]=np.sum(X*gamma[:,0].reshape((n_samples,1)),axis=0)/np.sum(gamma,axis=0)[0]\n",
    "mu[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:30:45.568030Z",
     "start_time": "2020-01-29T13:30:45.560051Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.49091163, 0.25101938],\n",
       "       [0.714     , 0.346     ],\n",
       "       [0.532     , 0.472     ]])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mu"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 更新sigma\n",
    "\n",
    "同样以gamma做权重"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:36:58.692503Z",
     "start_time": "2020-01-29T13:36:58.683527Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.02530905, 0.00413907],\n",
       "       [0.00413907, 0.01586245]])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 更新 sigma_0\n",
    "sigma[0]=0\n",
    "for j in range(n_samples):\n",
    "    sigma[0]+=(X[j].reshape((1,n_features))-mu[0]).T.dot((X[j]-mu[0]).reshape((1,n_features)))*gamma[j,0]\n",
    "sigma[0]=sigma[0]/np.sum(gamma,axis=0)[0]\n",
    "sigma[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 全部整合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:37:48.111279Z",
     "start_time": "2020-01-29T13:37:47.905629Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    " \n",
    "def multiGaussian(x,mu,sigma):\n",
    "    return 1/((2*np.pi)*pow(np.linalg.det(sigma),0.5))*np.exp(-0.5*(x-mu).dot(np.linalg.pinv(sigma)).dot((x-mu).T))\n",
    " \n",
    "def computeGamma(X,mu,sigma,alpha,multiGaussian):\n",
    "    n_samples=X.shape[0]\n",
    "    n_clusters=len(alpha)\n",
    "    gamma=np.zeros((n_samples,n_clusters))\n",
    "    p=np.zeros(n_clusters)\n",
    "    g=np.zeros(n_clusters)\n",
    "    for i in range(n_samples):\n",
    "        for j in range(n_clusters):\n",
    "            p[j]=multiGaussian(X[i],mu[j],sigma[j])\n",
    "            g[j]=alpha[j]*p[j]\n",
    "        for k in range(n_clusters):\n",
    "            gamma[i,k]=g[k]/np.sum(g)\n",
    "    return gamma\n",
    " \n",
    "class MyGMM():\n",
    "    def __init__(self,n_clusters,ITER=50):\n",
    "        self.n_clusters=n_clusters\n",
    "        self.ITER=ITER\n",
    "        self.mu=0\n",
    "        self.sigma=0\n",
    "        self.alpha=0\n",
    "      \n",
    "    def fit(self,data):\n",
    "        n_samples=data.shape[0]\n",
    "        n_features=data.shape[1]\n",
    "        '''\n",
    "        mu=data[np.random.choice(range(n_samples),self.n_clusters)]\n",
    "        '''\n",
    "        alpha=np.ones(self.n_clusters)/self.n_clusters\n",
    "        \n",
    "        mu=np.array([[.403,.237],[.714,.346],[.532,.472]])\n",
    "        \n",
    "        sigma=np.full((self.n_clusters,n_features,n_features),np.diag(np.full(n_features,0.1)))\n",
    "        \n",
    "        for i in range(self.ITER):\n",
    "            gamma=computeGamma(data,mu,sigma,alpha,multiGaussian)\n",
    "            alpha=np.sum(gamma,axis=0)/n_samples\n",
    "            for i in range(self.n_clusters):\n",
    "                mu[i]=np.sum(data*gamma[:,i].reshape((n_samples,1)),axis=0)/np.sum(gamma,axis=0)[i]\n",
    "                sigma[i]=0\n",
    "                for j in range(n_samples):\n",
    "                    sigma[i]+=(data[j].reshape((1,n_features))-mu[i]).T.dot((data[j]-mu[i]).reshape((1,n_features)))*gamma[j,i]\n",
    "                sigma[i]=sigma[i]/np.sum(gamma,axis=0)[i]\n",
    "        self.mu=mu\n",
    "        self.sigma=sigma\n",
    "        self.alpha=alpha\n",
    "        \n",
    "    def predict(self,data):\n",
    "        pred=computeGamma(data,self.mu,self.sigma,self.alpha,multiGaussian)\n",
    "        cluster_results=np.argmax(pred,axis=1)\n",
    "        return cluster_results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:37:48.640830Z",
     "start_time": "2020-01-29T13:37:48.600939Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "data=pd.read_csv('data_watermelon/watermelon_4.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:37:49.095615Z",
     "start_time": "2020-01-29T13:37:49.092619Z"
    }
   },
   "outputs": [],
   "source": [
    "# X = np.array(data.values, dtype=float)\n",
    "X = data.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-29T13:37:51.221270Z",
     "start_time": "2020-01-29T13:37:50.308369Z"
    }
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPW9//HXZ2aysgUIiCzKjiC4RrRYt2qtS4u2rrRytdaqbblS23t/bX/tVbTea2tbKa1ef1WrdanVql2opdqKdUclKoJCUTYBBUF2SDKTmfn8/ghiSAYyCZk5mZP38/HwYc6Zk5n3cZK3J99z5nvM3RERkXCJBB1ARETan8pdRCSEVO4iIiGkchcRCSGVu4hICKncRURCSOUuIhJCKncRkRBSuYuIhFAsqBeurKz0wYMHB/XyIiIF6dVXX/3Q3fu0tF1g5T548GCqq6uDenkRkYJkZu9ms52GZUREQkjlLiISQip3EZEQUrmLiISQyl1EJIRU7iIiIZRVuZvZaWa22MyWmNl3Mzx+iZmtN7N5O/+5rP2jiohItlq8zt3MosCtwKeB1cBcM5vp7gubbPqQu0/JQUYREWmlbI7cxwNL3H2ZuyeAB4GzchtLRET2RTblPgBY1Wh59c51TZ1jZvPN7BEzG5TpiczscjOrNrPq9evXtyGuiIhkI5tytwzrvMnyX4DB7n4I8CRwT6Yncvfb3b3K3av69GlxagQRkQ7h7Q0f8r9zX+aO1+ayeuuWoONkJZu5ZVYDjY/EBwLvN97A3Tc0WrwD+PG+RxMRCd5PX3yeu+a9Sn0qRdQi3DznRaad8CkuGDsu6Gh7lc2R+1xghJkNMbNi4EJgZuMNzGz/RosTgUXtF1FEJBhvrvuAu+a9Sl0yScqdRDpFPJVk2jOzWV+zI+h4e9Viubt7EpgCPEFDaf/e3d8ys+vNbOLOza4ys7fM7A3gKuCSXAUWEcmXv769mEQy2Wx9xIynli0NIFH2spry191nAbOarLum0dffA77XvtFERIJlZjScdtz9NKNhYJlOR3Yc+oSqiMgefHbkKIpj0WbrU57m5CHDAkiUPZW7iMgejOnTlyuOOIqSaIyiSISSaJSSaJQbTjqFyvLyoOPtVWB3YhIJA3eHxMuQXAKxYVB8zM4/5SUsph4zgc+NOojZy5cSi0Q5bdgI9u/WLehYLVK5i7SRp7fiGy+C1ErwFFgUooOg1/1YpEfQ8aQdDe3Zi6E9ewUdo1U0LCPSRr7tvyG5FLwGiDf8O7kM33pD0NFEVO4ibVY7C6hvsrIe6v4WRBqR3ajcRdosvYf1qYaxeJEAacxdOoSPyrCgTkaWHAfxp9m95CNQfFxh7UcB+bCmhtnLl2LAp4YM6/BXrARJ5S6B8vp/4VunQf3rQAle9gWs+3cwKws6Wous+7X4hvmQrgFqgDKIlGM9rg06Wij9/q0FXPv0bKLWMOBw7dOzueGkUzhnzNiAk3VMKncJjKfW4hsngX80R0cd1D6Kp1ZhvX4daLZsWHR/qPwHXvsYJBdB7CCs7LNYpGvQ0ULnvW1bufbp2cRTKSC1a/0P/vkkEwYdWBCXJuabyl0C4zX3gTc9IRmHxFw8uQyLDQ0kV2tYpAvW5YKgY4Te3955u9k84x95fOk7fPmwI/KapxDohKoEp34RkGi+3mKQXJb3ONJx1adTpDOcpE67U59KZfgOUblLcIrGAsXN13s9xIbnPY50XCcPGUYs0ryuIhbhlKEde46XoKjcJTBWfhFYCbvf7KsESiZgscEBpZKOaGTvSi497AhKYzEiQASjNBbj8iOrWvXJ0e2JBIs3fMjWeDx3YTsIjblLYCzaF3o/1PCJzsQrYGVQdh7W7VtBR5MO6D8mHMdnho/kr2//C8P47MhRHNx3v6y+N+3Oj59/lnvnzyMWiZBMp7jg4HH81/EnEc3wF0EYqNwlUBYbjvX6TdAxpECM67sf47Is9MbueK2a+xfMI55KEt85RP/wwjepKC3jm8dMaOeUHUM4/5clItLIna9VU9vkjkq1ySR3z3stoES5p3KXUPD6haQ3XkF63XGkN1yEx18OOpJ0IFvidRnXb0vEM16FEwYqdyl4nngD33AhJJ6G9AdQ/wq+6auka/8RdDTpIEZX9sm4fkSv3kRCOlWEyl0Knm/7EVDH7ve5rINtN2gCLwHgmhNOojQW23VdlgGlsRjTTvhUkLFySidUpfAlF2Ven17XMMe6dclvHulwjtx/AI+eN4lfvDKHRevXM7J3JVPGH8Mh+/ULOlrOqNyl8EV6Q6qm+XorASvNfx7pkEb36cttZ54VdIy80bCMFL4uVwJNZ5EshbIvYdb8zvUinYGO3KXgWdm5eHoT7PjfhhWegvLzsW5XBxtMJEAqdyl4ZoZ1vRzvcgmk1kKkEovoJg7SuancJTTMiiF2QNAxRDoEjbmLiISQyl1EJIRU7iIiIaRyFxEJIZW7iEgIqdxFREIoq3I3s9PMbLGZLTGz7+5lu3PNzM2sqv0iiohIa7VY7tbw+e1bgdOBMcAkMxuTYbtuwFWAJtIWEQlYNkfu44El7r7M3RPAg0Cm2Xd+CNxEw9yrIiISoGzKfQCwqtHy6p3rdjGzw4FB7v5YO2YTEZE2yqbcM92mZNcdEMwsAkwHvt3iE5ldbmbVZla9fv367FOKiEirZFPuq4FBjZYHAu83Wu4GjAWeNrMVwDHAzEwnVd39dnevcveqPn0y3/ZKRET2XTblPhcYYWZDzKwYuBCY+dGD7r7F3SvdfbC7DwZeAia6e3VOEouISItaLHd3TwJTgCeARcDv3f0tM7vezCbmOqCIiLReVlP+uvssYFaTddfsYdsT9z2WiIjsC31CVUQkhHSzDilonlwC8TkQ6QElJ2ORLkFHEgFgwboPeG3Ne/Qp78rJQ4ZSEstv3arcpSC5O771Wqj9E+BgUWAa9Pw1Vnx4wOmkM0um03z9rzN5YdW7pN2JRSKUxGI8eM4FDO/VO285NCwjhSk+G+r+TMMHouPgNeDb8U1X4p4KOp10Yr97cz4vrHqX2mSSeCrFjvp6NtXW8vW/zmz5m9uRyr3AJeL13Pm933Ju30v5XPfJXH/+z1i3MvwfEPOah8FrMzySgPrXc/e6nsTrniS97Wa85iE8vT1nryWF6cE351ObTO62zoHV27aycsvmvOXQsEyBu/bsHzP/mYUk6uoBeOEPL7PgmYXc9a8ZdOvZNeB0uZTYw3oDT+7hsX3j6e34xkmQWgVeg1MG234CvX6HFY3IyWtK4alPZ/7L0TCS6XTecujIvYAtX/AuC55btKvYAdJpp3Z7HY/f9VSAyXLPys4GyjI/WHxETl7Td9wGyeUNQ0AA1IJvw7e0OPOGdCJnjxpDSbT5cXPP0lKGVPTMWw6VewFbvmAlkWjztzBem+BfL78TQKI8Kj0Tio8CK9+5ohgoxXr8jIYPUmfgvvflltT+heZ/MTgkl+Lpja17LgmtLx92BCN796a8qAiAkmiMLkVFzDj9TMwyTdWVGxqWKWD9h/fD080Lqri0iMFjDwggUf6YxaDn7ZCYg8efg0hPrGwiFt0/8zdMmwabN8P06WDWUOxXXw0VFQ2PZWVvx0L5+6WVjq2sqIhHz/8i/1y+jJffW03/bt04a9RoepeXt/zN7UjlXsBGHTWcA0YPYNn8lSQTH48zx4pjnHn5KQEmyw+zCJQci5Ucu/cN3RuKfcaMhuXp0xuKfcYMmDq14fFsjqjKPg877gTijVZGoGg0Fsnfn9vS8cUiET49bDifHjY8sAwalilgZsaP/34Nn/zC0cSKY0SiEUaNH87Nz1xPr34qm13MGgp96tSGQo9EPi72j47ks3marldA0ZidQ0ExsC4Q6YX1uDm3+UXawLy1447tpKqqyqurNXFke0nWJ0mn0hSX7mG8WRqO0CONjmfS6ayL/eOncEi8DMk3IdIfSk/Z8xi/SA6Y2avu3uJ9qnXkHhKxopiKfW8+GmNv7OqrW31S1cywkmOwLpdhZWeo2KXDUrlL+H1U7B8NxaTTHw/RtKHgRQqBTqhK+Jk1XBXTeIx9+vSGxyoqWj00Ix3bnxcv4ucvvcja7dsZ0bs33zv2eD4xKNxXj2WiMXfpPJpeFZPtVTJSMH47fx7/8/wzu338vzQW466JX+CYgYP28p2FQ2PuIk01LXIVe6ik3fnZSy80m9elLpnkpheeDShVcFTuIhIK2+JxdiQyzzm0ZFPn+wSxyl1EQqFrcXHGOV0ABnbvnuc0wVO5i0goRCMRrqg6irImdzwqjcX41jEtfIo5hHS1jIiExterjiZqEf5f9SvsqE9QWV7O9z55AqcMDW4agKCo3EUkNMyMK6vGc/mRR5FIJSmJxvI6E2NHonIXkdCJmFEaKwo6RqA05i4iEkIqdxGREFK5i3QSH9bU8Ny7K1iycUPQUSQPNOYuEnLuzg3PPc0DC96gOBqlPp1mdGUffj3x81SU7uE+tFLwdOQuEnKPLHqLB9+cTzyVYlsiQV0yyZvrPuDqJ/4WdDTJIR25S+h44hV8+68gtQaKj8a6XoFF+wUdKzB3vf5qs/lW6tNp5qxeyea6Wh29h5TKXUIlXfNH2HotUNewonYFXvcXqPwzFh0QaLagbInXZVwfMWN7IqFyDykNy0houNfDtv9mV7EDkATfgW+/NahYgTvxwCHEMnyQp1txCf27db45VzoLlbuER2o1kMz0AMRfzHeaDmPq0RPoUVpGSTQKQNSMsliMG08+lUgn/fRmZ6BhGQmPSAV4pnIHon3ym6UD2a9rV5646GLumz+POatWckCPCi49/EgOquy8/006g6zK3cxOA2YAUeBOd/9Rk8evBL4BpIDtwOXuvrCds4rslUV64iXHQ/xZoNG83laGdbk8sFwdQa+ycqYePYGpR08IOorkSYvDMmYWBW4FTgfGAJPMbEyTzR5w93HufhhwE3BzuycVyYL1uAmKPwGUgHUFyqDLv2Olnw46mkheZXPkPh5Y4u7LAMzsQeAsYNeRubtvbbR9F0C3k5dAWKQr1usOPPUBpD+E2FDMdDWIdD7ZlPsAYFWj5dXA0U03MrNvAN8CioFPtUs6kTay6H4Q3S/oGCKByeZqmUyn05sdmbv7re4+DPgO8IOMT2R2uZlVm1n1+vXrW5dURESylk25rwYGNVoeCLy/l+0fBM7O9IC73+7uVe5e1aePztSLiORKNuU+FxhhZkPMrBi4EJjZeAMzG9Fo8UzgnfaLKCIirdXimLu7J81sCvAEDZdC3uXub5nZ9UC1u88EppjZKUA9sAm4OJehRURk77K6zt3dZwGzmqy7ptHXU9s5l4iI7ANNPyAiEkIqdxGREFK5i4iEkMpdRCSEQlHu6XSaRLw+6Bgisq/c974sWSvock/E67n1ql8zsdtkPtvlS1w29mreeOatoGOJSFtMmwZXX/1xobs3LE+bFmSqglXQ5f6TL9/KrF8/Rbw2gaeddxeu5vtn3sjyN1cGHU0kFFLpNL9b8AZnPXg/ZzxwL7e/Ope6ZA7+SnaHzZthxoyPC/7qqxuWN2/WEXwbmAf0H62qqsqrq6vb/P0b1mxi8rBvUF+3+w9aJGKcNOmTfPe+q/Y1okin9/W/zuSZd5fvusF2aSzGqN6VPHzeJGKRdj42bFzoH5k6FaZPB90xahcze9Xdq1rarmCP3NcuX0dxSVGz9em0s+LNVRm+Q0RaY+H6dbsVO0BdMsk7Gzfwz+XL2v8FzRqKvDEVe5sVbLkPHLk/9RlOokZjEUaNH75Pz73lw638csqdXND/q0we9g0euulPJOv3cPs2kZCqfv890hn+sq+pr2fO6hwMfX505N5Y4zF4aZWCLfceld059ZKTKCkv3m19cWkxF/yfs9r8vLU76vj6Ud9h1h1PsnHtZtYuX8d91z3MDy/QzaWkc6ks70IsEm22viQapV/Xbu37Yo2HZKZOhXS64d+Nx+ClVQr6BtlTfnkp/Qb34Q8zZrF90w7GTBjJ126+hP7D+rX5OWff/xxbP9xGsj61a128NkH1E2+w/M2VDBl7QHtEF+nwTh4ylOJolJr63W/gEI1E+Pzopnfa3EdmUFGx+xj7R0M0FRUammmDgj2hmis/mvwLZv/2uWbrS7uU8O+3XMapF5+Y/1AiAXlnwwau+Ouf+GD7dsyMrsXF/OK0zzJ+wMDcvKD77kXedFmyPqFa0EfuuTBgxP4UlRQ1G883M/Y7UDcYkc5lRO/ezJ58Kcs3byKZTjO8V28iuSzbps+tYm+zgh1zz5XTLzuZWNHu44zRWITe/Xsy7vjRAaUSCY6ZMbRnL0b2rsxtsUu7Urk3Udm/Fzc9eQ0DR/anqCRGrDjGuOPH8NN/Xkekva/r3YutG7axuHopWzduy9trikh4aFgmg4PGj+Duf81g0webKSopomtFl7y9diqZYsbX72D2/c8SK46RTCQ59eITmXLLV4hGm1+5ICKSicp9L3ruV5H317z3uod56oHnSNTVk9j56dt/3PcMvfv35KL/Oi/veQCWzFvOs4+8RCRinHD+BF0xJFIAdLVMB3N2z4vZsaWm2fruvbvy6Pq7857n7h/8jkd//ljDNA9mFBXH+NJ/ncuk734+71lEpBNMPxBG7k7N1tqMj23f3Lzwc235myt5dPpjxGsSpNNOOpUmXpvg/usfZs2yD/KeRyTfdiQS/HHRQu58rZo3PlgbdJxW0bBMB2JmDDvsQJa8vqLZYyOOHJr3PC/86RXqE82nXXCHOTOr+cI3z8x7JpF8eXPdB3zpDw+T9jSJVIpYJMJxBw7m1tM/RzSPF1e0VcdP2Ml84xdfoaS8hEik4ZKzSMQoKS/hGzMuzXuWWCy6K0djFjEiMf3oSHi5O1c89me2JeLsqK+nPp2mNpnkuXdX8OiiwrhnhH5DO5ixxx7EL1/6H44/fwIHjhnIiRd+kltevpHRR4/Ie5bjz/sEkViGK3Tc+eQXjs57HpF8WbzhQ7bE65qtr00meeitBQEkaj0Ny3RAQ8YewPcf+GbQMeg/rB9X/GQyv/qPezEzMMPTaa667atU9u8VdDyRnEm7s6ePa6UKZBIzlbvs1cSvn8aEs45izl9eJRIxJpx1VCCXiIrk00GVfSgrKmJH/e7TkJTFYpw7+uCAUrWOyl1aVDmgN5+78tSgY4jkTcSMW07/HJfO/ANpd+qSScqLijikbz/OP3hc0PGyonLvyDRDXmh5ai0kXgLrBiXHYVbc8jdJXo0fMJBnL7mMmYv/xfqaHRwzYBDHHnBgwcyvo3LvqKZNa7gx8EdzW390M4OKCt0NvsClt82AHXeAxQADYtDrbqxobNDRpIleZeVcctgRQcdoE10t0xGF9E7w8do4f/zlLL594rVc+/mbeO3J+UFHyjuPz4EddwEJ8BrwHeBb8E1fxT3V4veLZEtH7h1R47vQzJjx8d3gC/hO8Im6BFdN+D7vvbOGeE0CgFf/MZ9J3/s8X/r+OQGnyx+veRDI8Clkr4P6V6F4fN4zSTjpyL2jCtmd4J+8/zneX7J2V7EDxGvi/PaGR9m8fkuAyfLM9zSNhDUUvEg7Ubl3VCG7E/ycmXOp2xFvtr6oJMbCF98OIFEwrOxMoKz5A56CoiPznkfCS+XeEYXwTvAV+/XIOJWBu9OtV9cAEgWk9EwoOgSsfOeKKFAK3a/DIvm7b4CEX1Zj7mZ2GjCDhp/EO939R00e/xZwGZAE1gOXuvu77Zy18wjhneA/d+Wp/POB54nXfjwsYwZdK7py8LGjAkyWX2ZF0OtuiD+Fx2eDVWDl52Gx4UFHa1db43FeX/M+3UpKOKzf/gVz+WCYtDifu5lFgbeBTwOrgbnAJHdf2Gibk4CX3b3GzL4GnOjuF+zteTWfexZCdp373+6aza1X3U00FsHTTkXf7vzPrO8zcGT/oKNJO7rnjdf40fPPURyNkHanR2kp95x1DsN69Q46WihkO597NuX+CWCau39m5/L3ANz9xj1sfzhwi7sfu7fnVbl3TrU76lj8yhLKu5cx4oihDXPWSGi8tuZ9Jv/xYWqTH08VbUC/rt147stf1RF8O2jPm3UMAFY1Wl69c92efAX42x5CXW5m1WZWvX79+ixeWsKmrEsph500lpFHDlOxh9D98+dRl9z9HgAObI3X8dqa94MJ1UllU+6ZfgMzHu6b2UVAFfCTTI+7++3uXuXuVX369Mk+pexVfaKeNcs/oHaHLqWTYG2sq81YDmbG1njzq6Ukd7I5oboaGNRoeSDQ7H/BZnYK8H3gBHfXu5gnj9z8F+677mHS6TTptHPGZSdz5c8uJpppHnaRHPvM0OHMfW/1bsMyAPWpFFX9dW4ln7I5cp8LjDCzIdYwu9GFwMzGG+wcZ/8VMNHd17V/TMnkH/c9w2+ueYiabbXU7YiTqE3wt1/P5tf/97dBR5NO6gujD2Zoz16UxRqOG42GaXL/Y8JxdC8pDTZcJ9PiCVUAMzsD+DkNl0Le5e7/bWbXA9XuPtPMngTGAWt2fstKd5+4t+fUCdV9d+noqaxa3Hwcs6S8hD9t+g2xIs0uIfkXTyb5w6K3eHzpO/QsLeNLhxzKUf0HBh0rNLI9oZrVb7+7zwJmNVl3TaOvT2l1QtlnG9Zsyrg+VZ+ibkecrhUqd8m/kliMSeMOZdK4Q4OO0qnpE6oFbMSRQzOu717ZjS49yjM+JiKdg8q9gH31x5MpKS/Z7XNNJeXFfG36xbrMUKSTU7kXsFFVw/j58z9k/JlH0rt/T8Z+8iCu/9N3OPH8vX5+TEQ6gaxOqOaCTqiKiLRee35CVURECozKXUQkhFTuIiIhpAuhRSQra7dv46cvPs/TK5ZTXlzE5EMO59LDjiAa0TFiR6RyF5EWbamrY+KD97OptpaUOxvrapn+0gssXPcB0087M+h4koH+lysiLfrdm/PZHk+QanR1XV0yyeNL32Hlls0BJpM9UbmLSIvmvr+aulSy2fqiaJRFH+reDB2Ryl1EWjS0Zy+KMoytp9LOgG7dA0gkLVG5S2i4p0nvuJ/0+lNIf3A06U3fxJOrWv5GadG/HXI4scju9wgoikQY3qsXB/fpG1Aq2RuVu4SGb/0hbPsJpFaCb4L44/iGz+Mp3WJgXw3q0YN7zj6HwRU9KYpEKYpEOGHwEO45+xzNY9RB6WoZCQVPbYDah4FEo7Vp8Fq85h6s238GFS00qvoPYPbkL7OxtpbSWIwuxcVBR5K9ULlLOCQXg5WAJ5o8UA8JzWHUXsyM3uWaTroQaFhGwiE6MEOxA0QgNiTvcUSCpnKXULDYAVB8JNB0qKAYK780iEgigVK5S2hYxS1QegoNBV8MkQFYz9uwopFBRxPJO425S2hYpCtW8XPca8FrwXrqSg7ptFTuEjpmZWBlQccQCZSGZUREQkjlLiISQip3EZEQUrmLiISQyl1EJIRU7iIiIaRyFxEJIZW7iEgIqdxFREJI5S4iEkKafkBEQuu5lSu4Z97rbKqr5bRhI/jiuEM7zU1GsjpyN7PTzGyxmS0xs+9mePx4M3vNzJJmdm77x+y83J0dW2tI1je/87yI7Nltc1/mysf+zFMrlvH62jXc/NKLnP3Qb6mprw86Wl60WO5mFgVuBU4HxgCTzGxMk81WApcAD7R3wM7slb+9zr8Nm8I5lZdyVsXF/HLKnSTineMHU2RfbK6r5RevzKE2+fFBUTyV5P1tW3lk4ZsBJsufbI7cxwNL3H2ZuyeAB4GzGm/g7ivcfT6QzkHGTmnx3CVcf95PWbtiHalkikRtgsfv/ic3X3Zb0NFEOrzX166hKBpttr42meTJZUsDSJR/2ZT7AGBVo+XVO9dJDv3uxj+QqN39tnGJ2gTPPvISm9dvCSiVSGHoVVpG2r3Z+ogZfbt0CSBR/mVzQjXT3Q6a/1fL5onMLgcuBzjggAPa8hSdxqrF75PhZ5Oikhgfrt5IRZ8e+Q8l0kbbEwkeWPAGTyx9h56lZVx86OEcd+DgnL3eIfv1o095F1Zt3bJbyRdHo0w+5LCcvW5Hks2R+2pgUKPlgcD7bXkxd7/d3avcvapPnz5teYpOY9RRw4lEm789yUSS/YftF0Aikbapqa/n7Ifu5+cvvcjra9fw1IplXPnXmdw29+WcvaaZce/Z5zKkoidlsSK6FhdTFivi2uM/xaH99s/Z63Yk2Ry5zwVGmNkQ4D3gQuCLOU0lfPH/foHn//Aytdvrdq0rKS/hrCmn0aV7eYDJRFrn928t4P1t26hLfXxyszZZzy9emcOkcYdQUZqbu2YN6tGDv190CYs3fMjWeJxxffejrKgoJ6/VEbV45O7uSWAK8ASwCPi9u79lZteb2UQAMzvKzFYD5wG/MrO3chm6Mxg4sj8zXriBqlMPpaxbGf0G9+Xyn0zmshu/FHQ0kVaZvXwpdcnml/IWRaPMW7s2p69tZhxU2YfxAwZ2qmKHLD/E5O6zgFlN1l3T6Ou5NAzXSDsaMu5Abnz8B0HHENknfcq7YDQ/UZd2p2eZ7nWbK5p+QERy6uJDD6cktvtx5EdXrRzSV+ePckXlLiI5dWi//Zl2wqcandiMMaSiJ/eefS5mmS7Gk/aguWVEJOfOP3gcnxt5EAvWfUD3khJG9a5UseeYyl1E8qKsqIjxA3RqLl80LCMiEkIqdxGREFK5i4iEkMpdRCSEVO4iIiEUiqtlNq/fwt/veYY1S9cy9pOjOe7cYygu6VwfNRYRacw807yyeVBVVeXV1dX7/Dxvv7qU//zUdSR33tCitGsplf178suXbqRrReeYt1lEOg8ze9Xdq1raruCHZX40+RfUbKvddWOLuu11rF2xnvt/+EjAyUREglPQ5b5x7SbWLl/fbH0ykeSZh18MIJGISMdQ0OUejUXZ07BSUbHG3EWk8yrocu9R2Z2RVcOa3bGopKyYMy47OaBUIiLBK+hyB/j+A1OpHNCLsm5llJQVU1Jewrjjx3DOtz4bdDSRvHF33li7hr8vfYc127YFHUc6gIK/FLLvAX24d8ktVP/9Ddat/JCRVcMYVTUs6FgiebNux3Ym//ER3tu2lQhGIp3i/DFjue7EkzXzYidW8OUODWPvR59xRNAxRAIxZdZjLNu0kVSj80+PLlrIof3255zRBweYaiDbAAAEwklEQVSYTIJU8MMyIp3Zuh3bWbBu7W7FDg03oP7NvNcCSiUdgcpdpIBtTySIWOZf423xeJ7TSEeichcpYIMretIlw2W/RZEonxk2IoBE0lGo3EUKWMSMm045jdJYjOjOk6elsRh9upRzZdX4gNNJkEJxQlWkMztx8BD+cuFF3Dt/Hqu3buHYQQdy3pixdCspCTqaBEjlLhICw3r15roT9cE9+ZiGZUREQkjlLiISQip3EZEQUrmLiISQyl1EJIRU7iIiIaRyFxEJIZW7iEgIqdxFREJI5S4iEkIqdxGREDJvMsl/3l7YbD3wbiAvnhuVwIdBh8iRMO8bhHv/wrxvEO7929O+HejufVr65sDKPWzMrNrdq4LOkQth3jcI9/6Fed8g3Pu3r/umYRkRkRBSuYuIhJDKvf3cHnSAHArzvkG49y/M+wbh3r992jeNuYuIhJCO3EVEQkjl3kpmdpqZLTazJWb23QyPX2lmC8xsnpk9b2ZjgsjZFi3tW6PtzjUzN7OCuUohi/ftEjNbv/N9m2dmlwWRs62yee/M7HwzW2hmb5nZA/nO2FZZvHfTG71vb5vZ5iBytlUW+3eAmf3TzF43s/lmdkZWT+zu+ifLf4AosBQYChQDbwBjmmzTvdHXE4HHg87dXvu2c7tuwLPAS0BV0Lnb8X27BLgl6Kw53L8RwOtAz53LfYPO3V771mT7fwfuCjp3O793twNf2/n1GGBFNs+tI/fWGQ8scfdl7p4AHgTOaryBu29ttNgFKJSTGi3u204/BG4C6vIZbh9lu2+FKpv9+ypwq7tvAnD3dXnO2Fatfe8mAb/LS7L2kc3+OdB959c9gPezeWKVe+sMAFY1Wl69c91uzOwbZraUhhK8Kk/Z9lWL+2ZmhwOD3P2xfAZrB1m9b8A5O//sfcTMBuUnWrvIZv9GAiPN7AUze8nMTstbun2T7XuHmR0IDAGeykOu9pLN/k0DLjKz1cAsGv46aZHKvXUsw7pmR+bufqu7DwO+A/wg56nax173zcwiwHTg23lL1H6yed/+Agx290OAJ4F7cp6q/WSzfzEahmZOpOHo9k4zq8hxrvaQ1e/cThcCj7h7Kod52ls2+zcJ+I27DwTOAO7b+fu4Vyr31lkNND6iG8je/0R6EDg7p4naT0v71g0YCzxtZiuAY4CZBXJStcX3zd03uHt85+IdwJF5ytYesvm5XA382d3r3X05sJiGsu/oWvM7dyGFNSQD2e3fV4DfA7j7HKCUhnln9krl3jpzgRFmNsTMimn4YZrZeAMza/wLcybwTh7z7Yu97pu7b3H3Sncf7O6DaTihOtHdq4OJ2yrZvG/7N1qcCCzKY7591eL+AX8CTgIws0oahmmW5TVl22Szb5jZKKAnMCfP+fZVNvu3EjgZwMxG01Du61t64lg7Bw01d0+a2RTgCRrOct/l7m+Z2fVAtbvPBKaY2SlAPbAJuDi4xNnLct8KUpb7dpWZTQSSwEYarp4pCFnu3xPAqWa2EEgB/+nuG4JLnZ1W/FxOAh70nZeUFIos9+/bwB1mdjUNQzaXZLOf+oSqiEgIaVhGRCSEVO4iIiGkchcRCSGVu4hICKncRURCSOUuIhJCKncRkRBSuYuIhND/B+yJsaJWgZA5AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "model1=MyGMM(3)\n",
    "model1.fit(X)\n",
    "result=model1.predict(X)\n",
    "plt.scatter(X[:,0],X[:,1],c=result)\n",
    "plt.scatter(model1.mu[:,0],model1.mu[:,1],marker='x',color='red');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div><img src=\"./img/0905.png\", width=700>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "nbTranslate": {
   "displayLangs": [
    "*"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "fr",
   "useGoogleTranslate": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
